In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import norm
from itertools import repeat
from numpy.random import normal
from scipy.stats import ttest_1samp
In [2]:
import warnings
warnings.filterwarnings('ignore')

Load and clean data

Load states and abbreviation

In [3]:
states = pd.read_excel("./messy_data/states.xlsx")
states.drop(states.index[states.Abbreviation == "DC"], inplace=True)
states.columns = ["Jurisdiction", "Jurisdiction Abbreviation"]
states = states.append({"Jurisdiction" : "Federal", "Jurisdiction Abbreviation" : "FED"}, ignore_index=True)

Load prisoner population data

In [4]:
malePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Male", header=9, nrows=54).dropna(1, "all").dropna(0)
femalePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Female", header=9, nrows=54).dropna(1, "all").dropna(0)

totalPrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Total", header=9, nrows=54).dropna(1, "all").dropna(0)
In [5]:
def cleanPopulationTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)
    
    meltedTable = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Population")
    
    table.set_index("Jurisdiction", inplace=True)
    table.columns = table.columns.astype(np.int)
    table.columns.name = "Year"
    table = table.astype(np.float)
    
    return table, meltedTable
In [6]:
malePrisonerPopulation, malePrisonerPopulation_melted = cleanPopulationTable(malePrisonerPopulation, states)
femalePrisonerPopulation, femalePrisonerPopulation_melted  = cleanPopulationTable(femalePrisonerPopulation, states)
totalPrisonerPopulation, totalPrisonerPopulation_melted = cleanPopulationTable(totalPrisonerPopulation, states)
In [7]:
totalPrisonerPopulation.head()
Out[7]:
Year 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
Jurisdiction
Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 48300.0 ... 199618.0 201280.0 208118.0 209771.0 216362.0 217815.0 215866.0 210567.0 196455.0 189192.0
Alabama 5625.0 5464.0 6543.0 7657.0 9233.0 9856.0 10482.0 11015.0 11710.0 12827.0 ... 29412.0 30508.0 31874.0 31764.0 32270.0 32431.0 32381.0 31771.0 30810.0 28883.0
Alaska 712.0 760.0 822.0 1024.0 1322.0 1631.0 1967.0 2329.0 2460.0 2528.0 ... 5167.0 5014.0 5285.0 5391.0 5597.0 5633.0 5081.0 5794.0 5338.0 4434.0
Arizona 3456.0 3749.0 4372.0 5223.0 6069.0 6889.0 7845.0 8531.0 9434.0 10948.0 ... 37746.0 39502.0 40544.0 40209.0 40020.0 40080.0 41177.0 42259.0 42719.0 42320.0
Arkansas 2654.0 3042.0 2911.0 3328.0 3922.0 4246.0 4482.0 4611.0 4701.0 5441.0 ... 14314.0 14716.0 15208.0 16204.0 16108.0 14654.0 17235.0 17874.0 17707.0 17537.0

5 rows × 39 columns

Load admission and release data

In [8]:
maleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
maleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Male").dropna().replace("/", np.nan)

femaleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Female").dropna().replace("/", np.nan)
femaleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Female").dropna().replace("/", np.nan)

totalAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
totalRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
In [9]:
def cleanCountTable(table, states, tableType):
    
    table.replace([".", "/"], [np.nan, np.nan], inplace=True)
    
    for i, row in table.iterrows():
        if row.isnull().any():
            table.loc[i, 1 :] = pd.to_numeric(row.iloc[1 :]).interpolate()
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)
    
    meltedTable = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = tableType)
    
    table.set_index("Jurisdiction", inplace=True)
    table.columns = table.columns.astype(np.int)
    table.columns.name = "Year"
    table = table.astype(np.float)
    
    return table, meltedTable
In [10]:
maleAdmission, maleAdmission_melted = cleanCountTable(maleAdmission, states, "Admission")
maleRelease, maleRelease_melted = cleanCountTable(maleRelease, states, "Release")

femaleAdmission, femaleAdmission_melted = cleanCountTable(femaleAdmission, states, "Admission")
femaleRelease, femaleRelease_melted = cleanCountTable(femaleRelease, states, "Release")

totalAdmission, totalAdmission_melted = cleanCountTable(totalAdmission, states, "Admission")
totalRelease, totalRelease_melted = cleanCountTable(totalRelease, states, "Release")
In [11]:
totalAdmission.head()
Out[11]:
Year 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
Jurisdiction
Federal 14724.0 14120.0 12598.0 12830.0 14818.0 16745.0 16013.0 17916.0 18501.0 18709.0 ... 53618.000000 53662.000000 56153.000000 54121.0 60634.0 55938.0 53664.0 50865.0 46912.0 44682.0
Alabama 2572.0 2597.0 3766.0 4025.0 4425.0 4605.0 4701.0 4370.0 3962.0 4543.0 ... 10708.000000 11037.000000 13093.000000 11881.0 11387.0 11203.0 11265.0 10912.0 10451.0 10749.0
Alaska 258.0 311.0 459.0 461.0 541.0 711.0 727.0 875.0 1097.0 952.0 ... 2716.428571 2694.285714 2672.142857 2650.0 3789.0 3906.0 3906.0 3846.0 4271.0 1804.0
Arizona 1620.0 1641.0 2082.0 2759.0 2910.0 3288.0 3386.0 3989.0 4515.0 5370.0 ... 14046.000000 14867.000000 14526.000000 13249.0 13030.0 12970.0 13538.0 14439.0 14670.0 13663.0
Arkansas 1958.0 2189.0 2311.0 2419.0 2323.0 2173.0 2179.0 2301.0 2280.0 3152.0 ... 6651.000000 7017.000000 7383.000000 7603.0 7059.0 5782.0 8987.0 9435.0 9351.0 9911.0

5 rows × 39 columns

In [12]:
totalRelease.head()
Out[12]:
Year 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
Jurisdiction
Federal 17414.0 18584.0 14788.0 11773.0 13420.0 14484.0 15108.0 13497.0 15203.0 16123.0 ... 48764.0 52348.0 50720.0 52487.0 55239.0 56037.0 54785.0 54529.0 60156.0 52035.0
Alabama 2726.0 2744.0 3207.0 2908.0 2830.0 3225.0 3861.0 3694.0 3197.0 3480.0 ... 11079.0 11556.0 12231.0 12070.0 11052.0 11253.0 11488.0 11585.0 11446.0 12711.0
Alaska 235.0 216.0 268.0 271.0 358.0 505.0 501.0 620.0 960.0 892.0 ... 3286.0 3741.0 3196.0 3068.0 3599.0 3774.0 3774.0 3774.0 4085.0 2159.0
Arizona 1352.0 1638.0 1469.0 1874.0 2027.0 2243.0 2506.0 3354.0 3647.0 3795.0 ... 12560.0 13192.0 13854.0 13500.0 13149.0 13000.0 12931.0 13513.0 14092.0 13857.0
Arkansas 1878.0 1872.0 2366.0 2045.0 1724.0 1893.0 1953.0 2168.0 2189.0 2411.0 ... 6045.0 6610.0 6990.0 6664.0 7252.0 6298.0 6541.0 8812.0 9702.0 10370.0

5 rows × 39 columns

Load regions and divisions

Aggregate and sum population, admission and release according to regions and divisions

In [13]:
regions = pd.read_excel("./messy_data/state_region.xlsx")
regions.drop(regions.index[regions.State == "District of Columbia"], inplace=True)
In [14]:
def aggregateRegionalPopulationSum(meltedTable, regions):
    
    meltedTable = regions.merge(meltedTable, left_on="State", right_on="Jurisdiction", how="right")
    temp_index = meltedTable["Jurisdiction"] == "Federal"
    meltedTable.loc[temp_index, "Region"] = "Federal"
    meltedTable.loc[temp_index, "Division"] = "Federal"
    
    regionSum = meltedTable.groupby(["Year", "Region"]).Population.sum().unstack(0)
    divisionSum = meltedTable.groupby(["Year", "Region", "Division"]).Population.sum().unstack(0)
    
    return regionSum, divisionSum
In [15]:
malePrisonerPopulationRegionSum, malePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(malePrisonerPopulation_melted, regions)
femalePrisonerPopulationRegionSum, femalePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(femalePrisonerPopulation_melted, regions)
totalPrisonerPopulationRegionSum, totalPrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(totalPrisonerPopulation_melted, regions)
In [16]:
totalPrisonerPopulationRegionSum
Out[16]:
Year 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
Region
Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 48300.0 ... 199618.0 201280.0 208118.0 209771.0 216362.0 217815.0 215866.0 210567.0 196455.0 189192.0
Midwest 61657.0 63615.0 66211.0 73216.0 78312.0 81853.0 85664.0 95577.0 103011.0 111123.0 ... 263039.0 264314.0 261615.0 263485.0 262544.0 264758.0 267095.0 267116.0 263996.0 258805.0
Northeast 42236.0 43726.0 45796.0 53908.0 59325.0 64640.0 70498.0 75591.0 82364.0 91210.0 ... 179001.0 178642.0 177361.0 173912.0 170944.0 167648.0 166567.0 162694.0 157986.0 154166.0
South 128865.0 133455.0 143213.0 156038.0 177370.0 181395.0 187744.0 195950.0 207673.0 215180.0 ... 638228.0 646030.0 651504.0 654653.0 653305.0 641155.0 644421.0 637309.0 630236.0 624803.0
West 41851.0 44317.0 47093.0 55156.0 65045.0 72697.0 79312.0 88762.0 100898.0 111626.0 ... 316949.0 318016.0 316889.0 311982.0 295813.0 279021.0 283001.0 284633.0 277930.0 278431.0

5 rows × 39 columns

In [17]:
totalPrisonerPopulationDivisionSum
Out[17]:
Year 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
Region Division
Federal Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 48300.0 ... 199618.0 201280.0 208118.0 209771.0 216362.0 217815.0 215866.0 210567.0 196455.0 189192.0
Midwest East North Central 47664.0 49641.0 51175.0 56860.0 60027.0 62253.0 64639.0 72734.0 78572.0 84939.0 ... 197054.0 197599.0 194218.0 195052.0 193894.0 195291.0 196525.0 195055.0 191431.0 185877.0
West North Central 13993.0 13974.0 15036.0 16356.0 18285.0 19600.0 21025.0 22843.0 24439.0 26184.0 ... 65985.0 66715.0 67397.0 68433.0 68650.0 69467.0 70570.0 72061.0 72565.0 72928.0
Northeast Mid-Atlantic 34142.0 34480.0 35870.0 41870.0 46644.0 51469.0 56562.0 60069.0 65670.0 73078.0 ... 135416.0 135515.0 135498.0 132927.0 130848.0 128560.0 127424.0 124802.0 122074.0 119746.0
New England 8094.0 9246.0 9926.0 12038.0 12681.0 13171.0 13936.0 15522.0 16694.0 18132.0 ... 43585.0 43127.0 41863.0 40985.0 40096.0 39088.0 39143.0 37892.0 35912.0 34420.0
South East South Central 17746.0 19292.0 21055.0 24171.0 26617.0 28416.0 28724.0 29515.0 31336.0 33783.0 ... 100567.0 102196.0 101959.0 100826.0 103680.0 105271.0 103901.0 100990.0 99594.0 99300.0
South Atlantic 72413.0 72731.0 75670.0 82341.0 93252.0 93182.0 96325.0 102072.0 109094.0 112121.0 ... 289414.0 293571.0 298505.0 298277.0 295606.0 289461.0 288159.0 286722.0 284102.0 281710.0
West South Central 38706.0 41432.0 46488.0 49526.0 57501.0 59797.0 62695.0 64363.0 67243.0 69276.0 ... 248247.0 250263.0 251040.0 255550.0 254019.0 246423.0 252361.0 249597.0 246540.0 243793.0
West Mountain 11721.0 12548.0 13141.0 15092.0 17458.0 19383.0 21138.0 22869.0 25401.0 28271.0 ... 99802.0 101284.0 101802.0 102506.0 102253.0 100912.0 102806.0 103693.0 103591.0 103735.0
Pacific 30130.0 31769.0 33952.0 40064.0 47587.0 53314.0 58174.0 65893.0 75497.0 83355.0 ... 217147.0 216732.0 215087.0 209476.0 193560.0 178109.0 180195.0 180940.0 174339.0 174696.0

10 rows × 39 columns

In [18]:
def aggregateRegionalCountSum(meltedTable, regions, tableType):
    
    meltedTable = regions.merge(meltedTable, left_on="State", right_on="Jurisdiction", how="right")
    temp_index = meltedTable["Jurisdiction"] == "Federal"
    meltedTable.loc[temp_index, "Region"] = "Federal"
    meltedTable.loc[temp_index, "Division"] = "Federal"
    
    regionSum = meltedTable.groupby(["Year", "Region"])[tableType].sum().unstack(0)
    divisionSum = meltedTable.groupby(["Year", "Region", "Division"])[tableType].sum().unstack(0)
    
    return regionSum, divisionSum
In [19]:
maleAdmissionRegionSum, maleAdmissionDivisionSum = aggregateRegionalCountSum(maleAdmission_melted, regions, "Admission")
maleReleaseRegionSum, maleReleaseDivisionSum = aggregateRegionalCountSum(maleRelease_melted, regions, "Release")

femaleAdmissionRegionSum, femaleAdmissionDivisionSum = aggregateRegionalCountSum(femaleAdmission_melted, regions, "Admission")
femaleReleaseRegionSum, femaleReleaseDivisionSum = aggregateRegionalCountSum(femaleRelease_melted, regions, "Release")

totalAdmissionRegionSum, totalAdmissionDivisionSum = aggregateRegionalCountSum(totalAdmission_melted, regions, "Admission")
totalReleaseRegionSum, totalReleaseDivisionSum = aggregateRegionalCountSum(femaleRelease_melted, regions, "Release")

Load capacity data

In [20]:
capacity = pd.read_excel("./messy_data/facility capacity 2016.xlsx").replace(["…", "/"], np.nan)
capacity = capacity.set_index("Jurisdiction").max(axis=1)
capacity.dropna(inplace=True)
capacity = capacity.astype(np.float)
capacity.name = "Capacity"
capacity.head()
Out[20]:
Jurisdiction
Federal     135334.0
Alabama      25784.0
Alaska        4838.0
Arizona      44831.0
Arkansas     16449.0
Name: Capacity, dtype: float64
In [21]:
capacitySum = regions.merge(capacity, left_on="State",right_on="Jurisdiction")
capacitySum.columns = ["Jurisdiction", "Division", "Region", "Capacity"]
capacitySum.iloc[-1] = ["Federal", "Federal", "Federal", capacity["Federal"]]
capacitySum = capacitySum[["Region", "Division", "Jurisdiction", "Capacity"]]
In [22]:
capacityRegionSum = capacitySum.groupby(["Region"]).Capacity.sum()
capacityDivisionSum = capacitySum.groupby(["Region", "Division"]).Capacity.sum()
In [23]:
capacityRegionSum
Out[23]:
Region
Federal      135334.0
Midwest      194841.0
Northeast    145842.0
South        561824.0
West         248834.0
Name: Capacity, dtype: float64
In [24]:
capacityDivisionSum
Out[24]:
Region     Division          
Federal    Federal               135334.0
Midwest    East North Central    126930.0
           West North Central     67911.0
Northeast  Mid-Atlantic          122113.0
           New England            23729.0
South      East South Central     77048.0
           South Atlantic        270211.0
           West South Central    214565.0
West       Mountain               98390.0
           Pacific               150444.0
Name: Capacity, dtype: float64

Load average sentencing data

In [25]:
annualSentence = pd.read_excel("./messy_data/national sentencing.xlsx").iloc[:, [0,1]]
annualSentence.columns = ["Year", "Sentence"]
annualSentence.set_index("Year", inplace=True)
annualSentence = annualSentence.iloc[:-1,0]
In [26]:
annualSentence.plot()
plt.xticks(np.array(range(1995, 2017, 3)))
plt.ylabel("Sentence [Year]")
plt.title("Average National Sentencing")
Out[26]:
Text(0.5, 1.0, 'Average National Sentencing')

Forecast model

Given data of annual population $p[t]$, admission $a[t]$ and release $r[t]$, we can model the yearly dynamics of prisoner population with a first degree difference stochastic process $$p[t] - p[t - 1] = a[t] - r[t] + \varepsilon p[t - 1], \varepsilon \sim N(0,\delta^2 )$$ where $\varepsilon p[t - 1]$ is the Gaussian error term and $\varepsilon$ indicates the error is proportional to the previous population. $\varepsilon$ should be zero-centered with a variance of $\delta^2$.

Every year it is assumed that any prisoner's remaining sentence to serve is also a random variable $s[t]$ which is normally distributed. The center of that distribution is year-dependent and denoted $\mu [t]$, while the variance $\sigma^2$ is assumed to be invariate. The annual release can be approximated with such distribution's cumulative distribution function $$r[t] = F(1;\mu [t - 1],{\sigma ^2})p[t - 1]$$ $F(1)$ represents the proportion of prisoners whose remaining sentence is less than 1 year.

A second degree difference equation is used to describe the temporal evolution of the combined remaining sentence of the whole prisoner body $$p[t]\mu [t] = p[t - 1](\mu [t - 1] - 1) + 0.5r[t] + a[t]y[t] - 0.5a[t]$$ $p[t - 1](\mu [t - 1] - 1)$ represents the previous year's population's average number of remaining years to serve reduces for by 1 year. However, because $r[t]$ prisoners are being released, 1 year of decrease is an over-estimation. On average, for each prisoner released, a half-year over-estimation is introduced. Thus, it is compensated by the $+0.5r[t]$ term given the prisoners are released evenly throughout the year. $y[t]$ is the average sentence of the admitted prisoner of that year. $a[t]y[t]$ represents the contribution of the newly admitted prisoners. Similarly, for each newly admitted prisoner, because the individual will have served some time by the end of the year, we introduced a half-year over-estimation, which is corrected by the $-0.5a[t]$ term.

In order to make projection of the future prisoner population, unknown parameters and latent variables need to be estimated first. Given past anual survey of the prisoner population, admission and release, equation $p[t] - p[t - 1] = a[t] - r[t] + \varepsilon p[t - 1]$ can be used to estimate the variance of $\varepsilon$. Provided the average sentence of the newly admitted and the initial state of $\mu [t]$ ($\mu [1994]$ due to the lack of anual sentencing data prior to 1995), equation $p[t]\mu [t] = p[t - 1](\mu [t - 1] - 1) + 0.5r[t] + a[t]y[t] - 0.5a[t]$ can be used to popogate the state of the system and resolve $\mu [t]$. After that, $r[t] = F(1;\mu [t - 1],{\sigma ^2})p[t - 1]$ is used to approximate $\sigma$ for each corresponding year. The results are then averaged to generate a point estimation of $\sigma$.

To make forecast, first, a normal distribution is built based on past admission data to sample future admission. A similar distribution is also built for the past sentencing data to generate a sampler for future sentencing. Since the system state is known for up until 2016, equation $r[t] = F(1;\mu [t - 1],{\sigma ^2})p[t - 1]$ can be used to determine the next year's release while equation $p[t] - p[t - 1] = a[t] - r[t] + \varepsilon p[t - 1]$ can be used to sample for the following year's prisoner population. Finally, using equation $p[t]\mu [t] = p[t - 1](\mu [t - 1] - 1) + 0.5r[t] + a[t]y[t] - 0.5a[t]$ to propogate $\mu [t]$, one full iteration of state evolution is completed.

In [27]:
def extractAreaPopulationCount(table, area, tableType="Population", year_begin=1978,year_end=2016):
    # Extract a series of population, admission or release data for an area (state, region or division)
    # area is a pandas index
    
    data = table.loc[area]
         
    data = data[np.logical_and(data.index <= year_end, data.index >= year_begin)]
    data.name = tableType
    data.index.name="Year"
    
    return data
In [28]:
def propogateMu(past_mu,past_p, current_p, current_a,current_r, current_y):
    # State propogation function derived from the equilibrium of population * mu
    
    return (past_p * (past_mu - 1) + 0.5 * current_r + current_a * (current_y - 0.5)) / current_p
In [29]:
def solveMu(initialMu,initialPopulation, population, admission,release, anualSentence):
    # Given the initial state of mu and populatin,
    # this function can calculate the evolution of mu
    # according to the revolution of population, admission, release, and average sentence
    
    mu = population * 0
    mu.name = "Mu"

    for i, p in enumerate(population):
    
        if i == 0:
            past_p = initialPopulation
            past_mu = initialMu

        mu.iloc[i] = propogateMu(past_mu, past_p, population.iloc[i], admission.iloc[i],release.iloc[i], anualSentence.iloc[i])
        
        past_mu = mu.iloc[i]
        past_p = population.iloc[i]
        
    mu[1994] = initialMu
    mu = mu.sort_index()
    
    return mu
In [30]:
def estimateSigmaOfNormalCDF(x,F, mu):
    # Given x and F -- CDF at x of a normal distribution with center of mu,
    # this function estimate the square root of variance
    
    # This function can take vetor input
    
    # we have function F(x) as the CDF of N(mu, sigma^2)
    # given function Phi(x) as the CDF of N(0, 1)
    # F(x) = Phi((x - mu) / sigma)
    
    # the reverse function of Phi is norm.ppf()
    return (x - mu) / norm.ppf(F) 
In [31]:
def estimateSigma(population, release, mu): 
    
    mu = mu.values[: -1]
    p = population.values[: -1]
    r = release.values

    F = r / p
    sigma = estimateSigmaOfNormalCDF(1,F, mu)
    sigma = sigma[sigma >= 0]
    
    return sigma.mean()
In [32]:
def sampleFutureAdmissionSentence(sampleMean,sampleSD, year_end, dataType):
    
    years = range(2017, year_end + 1)
    simulated = normal(sampleMean,sampleSD, len(years))
    simulated[simulated <= 0.] = sampleMean
    if dataType == "Admission":
        simulated = np.round(simulated)
    
    simulated = pd.Series(simulated, index=years, name=dataType)
    simulated.index.name = "Year"
    
    return simulated
In [33]:
def propogateState(past_mu,past_p, current_a,current_y, epsilon_sd, sigma):
    # State propogation function
    # This function estimates release as PDF of a normal distribution with mu as the center.
    # The population was propogated as increment of difference between admission and release, plus proportional error
    
    if np.isnan(sigma):
        current_r = (- past_p + current_a * (current_y - 0.5 + past_mu)) / (past_mu - 0.5)
        # This dynamics tries to stablize mu
    else:
        current_r = np.round(norm(past_mu, sigma).cdf(1) * past_p)
    current_p = np.round(past_p + current_a - current_r + normal(scale=epsilon_sd) * past_p)
    current_mu = propogateMu(past_mu,past_p, current_p, current_a,current_r, current_y)
    
    return current_mu, current_p, current_r
In [34]:
def solvePopulation(initialMu,initialPopulation, futureAdmission,futureSentence, epsilon_sd, sigma):
    
    futurePopulation = futureAdmission * 0
    futurePopulation.name = "Population"
    
    futureRelease = futureAdmission * 0
    futureRelease.name = "Release"
    
    futureMu = futureSentence * 0
    futureMu.name = "Mu"
    
    for i, p in enumerate(futurePopulation):
        
        if i == 0:
            past_mu = initialMu
            past_p = initialPopulation
            
        futureMu.iloc[i], futurePopulation.iloc[i], futureRelease.iloc[i] = propogateState(past_mu,past_p, futureAdmission.iloc[i],futureSentence.iloc[i], epsilon_sd, sigma)
        
        past_mu = futureMu.iloc[i]
        past_p = futurePopulation.iloc[i]
        
    return futureMu, futurePopulation, futureRelease
In [35]:
def modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N):
    
    # Note: the population, admission and release data range -- 1978-2016
    # the anual sentencing data range -- 1995-2016
    # N is the number of samples to draw for prediction
    # initialMu is the mu for 1994
    # projectionYear is the end of projection

    
    # Estimate error term of population increment
    
    population = extractAreaPopulationCount(populationTable, area, "Population")
    admission = extractAreaPopulationCount(admissionTable, area, "Admission")
    release = extractAreaPopulationCount(releaseTable, area, "Release")
    # data range 1978-2017
    
    epsilon = (population.diff() - admission + release).values[1:] / population.values[:-1]
    epsilon_sd = epsilon.std()
    
    # Model state propogation
    
    modernPopulation = population.loc[1995 :]
    modernAdmission = admission.loc[1995 :]
    modernRelease = release.loc[1995 :]
    # data range 1995-2016
    
    # estimate mu[t]
    initialPopulation = population[1994] # use 1994 for initial population
    mu = solveMu(initialMu,initialPopulation, modernPopulation, modernAdmission,modernRelease, annualSentence)
    # solveMu already concates initial state of mu to the output
    
    # estimate sigma
    modernPopulation = population.loc[1994 :]
    # include the initial state of population

    sigma = estimateSigma(modernPopulation, modernRelease, mu)
    
    # Project to from 2017 to projectionYear
    
    # Model and sample N sets of future admission and sentence
    
    admissionSD = modernAdmission.std()
    futureAdmissions = [sampleFutureAdmissionSentence(modernAdmission.iloc[-1],admissionSD, 2050, "Admission") for i in range(N)]

    sentenceSD = annualSentence.std()
    futureSentences = [sampleFutureAdmissionSentence(annualSentence.iloc[-1],sentenceSD, 2050, "Sentence") for i in range(N)]
    
    # Project future mu, population and release
    
    futureMus, futurePopulations, futureReleases = zip(*map(solvePopulation, repeat(mu.iloc[-1]),repeat(modernPopulation.iloc[-1]), futureAdmissions,futureSentences, repeat(epsilon_sd), repeat(sigma)))
    
    futurePopulations = pd.concat(futurePopulations, axis=1)
    futureAdmissions = pd.concat(futureAdmissions, axis=1)
    futureReleases = pd.concat(futureReleases, axis=1)
    
    return futurePopulations, futureAdmissions,futureReleases

Projection of prisoner population

In [36]:
def collapseSamples(sampleTable, area):
    
    sampleMean = sampleTable.mean(axis=1)
    sampleMean.name = area
    
    sampleSD = sampleTable.std(axis=1)
    sampleSD.name = area
    
    return sampleMean, sampleSD
    
In [37]:
def modelAndProjectPopulation_NSamples(populationTable, admissionTable,releaseTable, annualSentence, initialMu, projectionYear, N):
    
    areas = populationTable.index
    
    futurePopulationMeans = []
    futurePopulationSDs = []
    
    futureAdmissionMeans = []
    futureAdmissionSDs = []
    
    futureReleaseMeans = []
    futureReleaseSDs = []
    
    for area in areas:
        
        futurePopulations, futureAdmissions,futureReleases = modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N)
        
        futurePopulationMean, futurePopulationSD = collapseSamples(futurePopulations, area)
        futurePopulationMeans.append(futurePopulationMean)
        futurePopulationSDs.append(futurePopulationSD)
        
        futureAdmissionMean, futureAdmissionSD = collapseSamples(futureAdmissions, area)
        futureAdmissionMeans.append(futureAdmissionMean)
        futureAdmissionSDs.append(futureAdmissionSD)
        
        futureReleaseMean, futureReleaseSD = collapseSamples(futureReleases, area)
        futureReleaseMeans.append(futureReleaseMean)
        futureReleaseSDs.append(futureReleaseSD)
        
    futurePopulationMeans = pd.concat(futurePopulationMeans, axis=1).transpose()
    futureAdmissionMeans = pd.concat(futureAdmissionMeans, axis=1).transpose()
    futureReleaseMeans = pd.concat(futureReleaseMeans, axis=1).transpose()
    
    futurePopulationSDs = pd.concat(futurePopulationSDs, axis=1).transpose()
    futureAdmissionSDs = pd.concat(futureAdmissionSDs, axis=1).transpose()
    futureReleaseSDs = pd.concat(futureReleaseSDs, axis=1).transpose()
        
    return futurePopulationMeans,futurePopulationSDs, futureAdmissionMeans,futureAdmissionSDs, futureReleaseMeans,futureReleaseSDs
In [38]:
plt.rcParams["figure.figsize"] = (10,5)
In [39]:
maleFuturePopulationMean_5,maleFuturePopulationSD_5, maleFutureAdmissionMean_5,maleFutureAdmissionSD_5, maleFutureReleaseMean_5,maleFutureReleaseSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 5, 2050, 250)
maleFuturePopulationMean_10,maleFuturePopulationSD_10, maleFutureAdmissionMean_10,maleFutureAdmissionSD_10, maleFutureReleaseMean_10,maleFutureReleaseSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 10, 2050, 250)
maleFuturePopulationMean_20,maleFuturePopulationSD_20, maleFutureAdmissionMean_20,maleFutureAdmissionSD_20, maleFutureReleaseMean_20,maleFutureReleaseSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 20, 2050, 250)
maleFuturePopulationMean_40,maleFuturePopulationSD_40, maleFutureAdmissionMean_40,maleFutureAdmissionSD_40, maleFutureReleaseMean_40,maleFutureReleaseSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 40, 2050, 250)

femaleFuturePopulationMean_5,femaleFuturePopulationSD_5, femaleFutureAdmissionMean_5,femaleFutureAdmissionSD_5, femaleFutureReleaseMean_5,femaleFutureReleaseSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 5, 2050, 250)
femaleFuturePopulationMean_10,femaleFuturePopulationSD_10, femaleFutureAdmissionMean_10,femaleFutureAdmissionSD_10, femaleFutureReleaseMean_10,femaleFutureReleaseSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 10, 2050, 250)
femaleFuturePopulationMean_20,femaleFuturePopulationSD_20, femaleFutureAdmissionMean_20,femaleFutureAdmissionSD_20, femaleFutureReleaseMean_20,femaleFutureReleaseSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 20, 2050, 250)
femaleFuturePopulationMean_40,femaleFuturePopulationSD_40, femaleFutureAdmissionMean_40,femaleFutureAdmissionSD_40, femaleFutureReleaseMean_40,femaleFutureReleaseSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 40, 2050, 250)
In [40]:
maleFuturePopulationRegionSumMean_5,maleFuturePopulationRegionSumSD_5, maleFutureAdmissionRegionSumMean_5,maleFutureAdmissionRegionSumSD_5, maleFutureReleaseRegionSumMean_5,maleFutureReleaseRegionSumSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 5, 2050, 250)
maleFuturePopulationRegionSumMean_10,maleFuturePopulationRegionSumSD_10, maleFutureAdmissionRegionSumMean_10,maleFutureAdmissionRegionSumSD_10, maleFutureReleaseRegionSumMean_10,maleFutureReleaseRegionSumSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 10, 2050, 250)
maleFuturePopulationRegionSumMean_20,maleFuturePopulationRegionSumSD_20, maleFutureAdmissionRegionSumMean_20,maleFutureAdmissionRegionSumSD_20, maleFutureReleaseRegionSumMean_20,maleFutureReleaseRegionSumSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 20, 2050, 250)
maleFuturePopulationRegionSumMean_40,maleFuturePopulationRegionSumSD_40, maleFutureAdmissionRegionSumMean_40,maleFutureAdmissionRegionSumSD_40, maleFutureReleaseRegionSumMean_40,maleFutureReleaseRegionSumSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 40, 2050, 250)

femaleFuturePopulationRegionSumMean_5,femaleFuturePopulationRegionSumSD_5, femaleFutureAdmissionRegionSumMean_5,femaleFutureAdmissionRegionSumSD_5, femaleFutureReleaseRegionSumMean_5,femaleFutureReleaseRegionSumSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 5, 2050, 250)
femaleFuturePopulationRegionSumMean_10,femaleFuturePopulationRegionSumSD_10, femaleFutureAdmissionRegionSumMean_10,femaleFutureAdmissionRegionSumSD_10, femaleFutureReleaseRegionSumMean_10,femaleFutureReleaseRegionSumSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 10, 2050, 250)
femaleFuturePopulationRegionSumMean_20,femaleFuturePopulationRegionSumSD_20, femaleFutureAdmissionRegionSumMean_20,femaleFutureAdmissionRegionSumSD_20, femaleFutureReleaseRegionSumMean_20,femaleFutureReleaseRegionSumSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 20, 2050, 250)
femaleFuturePopulationRegionSumMean_40,femaleFuturePopulationRegionSumSD_40, femaleFutureAdmissionRegionSumMean_40,femaleFutureAdmissionRegionSumSD_40, femaleFutureReleaseRegionSumMean_40,femaleFutureReleaseRegionSumSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 40, 2050, 250)
In [41]:
maleFuturePopulationDivisionSumMean_5,maleFuturePopulationDivisionSumSD_5, maleFutureAdmissionDivisionSumMean_5,maleFutureAdmissionDivisionSumSD_5, maleFutureReleaseDivisionSumMean_5,maleFutureReleaseDivisionSumSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 5, 2050, 250)
maleFuturePopulationDivisionSumMean_10,maleFuturePopulationDivisionSumSD_10, maleFutureAdmissionDivisionSumMean_10,maleFutureAdmissionDivisionSumSD_10, maleFutureReleaseDivisionSumMean_10,maleFutureReleaseDivisionSumSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 10, 2050, 250)
maleFuturePopulationDivisionSumMean_20,maleFuturePopulationDivisionSumSD_20, maleFutureAdmissionDivisionSumMean_20,maleFutureAdmissionDivisionSumSD_20, maleFutureReleaseDivisionSumMean_20,maleFutureReleaseDivisionSumSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 20, 2050, 250)
maleFuturePopulationDivisionSumMean_40,maleFuturePopulationDivisionSumSD_40, maleFutureAdmissionDivisionSumMean_40,maleFutureAdmissionDivisionSumSD_40, maleFutureReleaseDivisionSumMean_40,maleFutureReleaseDivisionSumSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 40, 2050, 250)

femaleFuturePopulationDivisionSumMean_5,femaleFuturePopulationDivisionSumSD_5, femaleFutureAdmissionDivisionSumMean_5,femaleFutureAdmissionDivisionSumSD_5, femaleFutureReleaseDivisionSumMean_5,femaleFutureReleaseDivisionSumSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 5, 2050, 250)
femaleFuturePopulationDivisionSumMean_10,femaleFuturePopulationDivisionSumSD_10, femaleFutureAdmissionDivisionSumMean_10,femaleFutureAdmissionDivisionSumSD_10, femaleFutureReleaseDivisionSumMean_10,femaleFutureReleaseDivisionSumSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 10, 2050, 250)
femaleFuturePopulationDivisionSumMean_20,femaleFuturePopulationDivisionSumSD_20, femaleFutureAdmissionDivisionSumMean_20,femaleFutureAdmissionDivisionSumSD_20, femaleFutureReleaseDivisionSumMean_20,femaleFutureReleaseDivisionSumSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 20, 2050, 250)
femaleFuturePopulationDivisionSumMean_40,femaleFuturePopulationDivisionSumSD_40, femaleFutureAdmissionDivisionSumMean_40,femaleFutureAdmissionDivisionSumSD_40, femaleFutureReleaseDivisionSumMean_40,femaleFutureReleaseDivisionSumSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 40, 2050, 250)
In [42]:
def plotFederalProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, admission,futureAdmission,futureAdmission_error, release,futureRelease,futureRelease_error, sex,initialMu,maxPopulation):
        
    pd.concat([population, futurePopulation]).plot(color="darkorange", label="population")
    plt.fill_between(futurePopulation.index, futurePopulation - futurePopulation_error, futurePopulation + futurePopulation_error, color="darkorange", alpha=0.25)
        
    pd.concat([admission, futureAdmission]).plot(color="crimson", label="admission")
    plt.fill_between(futureAdmission.index, futureAdmission - futurePopulation_error, futureAdmission + futureAdmission_error, color="crimson", alpha=0.25)

    pd.concat([release, futureRelease]).plot(color="steelblue", label="release")
    plt.fill_between(futureRelease.index, futureRelease - futureRelease_error, futureRelease + futureRelease_error, color="steelblue", alpha=0.25)
    
    plt.axvline(1995, color="grey")
    
    plt.legend(loc="upper left")
    plt.ylim([0, maxPopulation])
    plt.ylabel("Head Count")
    plt.title("Projection of Federal " + sex + " Prisoner Population, mu[1994]=" + str(initialMu))
    
    plt.show()
In [43]:
def plotRegionalProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, sex,initialMu,maxPopulation):
    
    population = population.sort_index().drop("Federal").transpose()
    futurePopulation = futurePopulation.sort_index().drop("Federal").transpose()
    futurePopulation_error = futurePopulation_error.drop("Federal").sort_index().transpose()
    
    pd.concat([population, futurePopulation]).plot()
    
    plt.gca().set_prop_cycle(None)
    for i in range(population.shape[1]):
        plt.fill_between(futurePopulation.index, futurePopulation.iloc[:, i] - futurePopulation_error.iloc[:, i], futurePopulation.iloc[:, i] + futurePopulation_error.iloc[:, i], alpha=0.25)
    
    plt.axvline(1995, color="grey")
    
    plt.legend(loc="upper left")
    plt.ylim([0, maxPopulation])
    plt.ylabel("Head Count")
    plt.title("Projection of Regional " + sex + " Prisoner Population, mu[1994]=" + str(initialMu))
    
    plt.show()
In [44]:
def alignDivisionTable(table, divisionTable, regions):
    
    regions = regions.set_index("State")

    divisionTable = divisionTable.drop("Federal")
    divisionTable = divisionTable.sort_index()
    
    table = regions.join(table).reset_index().set_index(["Region","Division", "State"]).sort_index()
    
    return table, divisionTable
In [45]:
def plotDivisionalPopulationProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, divisionalPopulation,futureDivisionalPopulation,futureDivisionalPopulation_error, sex,initialMu):

    population, divisionalPopulation = alignDivisionTable(population, divisionalPopulation, regions)

    futurePopulation, futureDivisionalPopulation = alignDivisionTable(futurePopulation, futureDivisionalPopulation, regions)
    futurePopulation_error, futureDivisionalPopulation_error = alignDivisionTable(futurePopulation_error, futureDivisionalPopulation_error, regions)
    
    regionList = population.index.get_level_values(0).unique()
    for region in regionList:
    
        divisionalPopulation_1Region = divisionalPopulation.loc[region]
        futureDivisionalPopulation_1Region = futureDivisionalPopulation.loc[region]
        futureDivisionalPopulation_error_1Region = futureDivisionalPopulation_error.loc[region]
    
        plt.figure()
        pd.concat([divisionalPopulation_1Region, futureDivisionalPopulation_1Region], axis=1).transpose().plot()
        plt.legend(loc="upper left")
        plt.gca().set_prop_cycle(None)
        for index, row in futureDivisionalPopulation_1Region.iterrows():
            plt.fill_between(row.index.values, row - futureDivisionalPopulation_error_1Region.loc[index], row + futureDivisionalPopulation_error_1Region.loc[index], alpha=0.25)
        plt.axvline(1995, color="grey")
        plt.ylabel("Population")
        plt.title(region + " " + sex + " Population, mu[1994]=" + str(initialMu))
    
        population_1Region = population.loc[region]
        futurePopulation_1Region = futurePopulation.loc[region]
        futurePopulation_error_1Region = futurePopulation_error.loc[region]
    
        divisions = population_1Region.index.get_level_values(0).unique()
        for division in divisions:
        
            population_1Division = population_1Region.loc[division]
            futurePopulation_1Division = futurePopulation_1Region.loc[division]
            futurePopulation_error_1Division = futurePopulation_error_1Region.loc[division]
        
            plt.figure()
            pd.concat([population_1Division, futurePopulation_1Division], axis=1).transpose().plot()
            plt.legend(loc="upper left")
            plt.gca().set_prop_cycle(None)
            for index, row in futurePopulation_1Division.iterrows():
                plt.fill_between(row.index.values.astype(int), row - futurePopulation_error_1Division.loc[index], row + futurePopulation_error_1Division.loc[index], alpha=0.25)
            plt.ylim(bottom=0)
            plt.axvline(1995, color="grey")
            plt.ylabel("Population")
            plt.title(region + " -- " + division + " " + sex + " Population, mu[1994]=" + str(initialMu))

Here, a series of initial state ($\mu[1994] = 5, 10, 20, 40$) is used to solve the latent variables and to estimate the parameters for both the male and female facilities, following which 250 simulations are generated and averaged to build a projection with standard deviaion plotted as shaded areas.

In [46]:
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_5.loc["Federal"],maleFuturePopulationSD_5.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_5.loc["Federal"],maleFutureAdmissionSD_5.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_5.loc["Federal"],maleFutureReleaseSD_5.loc["Federal"], "Male", 5, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_5,maleFuturePopulationRegionSumSD_5, "Male", 5, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_5,maleFuturePopulationSD_5, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_5,maleFuturePopulationDivisionSumSD_5, "Male",5)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [47]:
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_10.loc["Federal"],maleFuturePopulationSD_10.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_10.loc["Federal"],maleFutureAdmissionSD_10.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_10.loc["Federal"],maleFutureReleaseSD_10.loc["Federal"], "Male", 10, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_10,maleFuturePopulationRegionSumSD_10, "Male", 10, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_10,maleFuturePopulationSD_10, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_10,maleFuturePopulationDivisionSumSD_10, "Male",10)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [48]:
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_20.loc["Federal"],maleFuturePopulationSD_20.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_20.loc["Federal"],maleFutureAdmissionSD_20.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_20.loc["Federal"],maleFutureReleaseSD_20.loc["Federal"], "Male", 20, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_20,maleFuturePopulationRegionSumSD_20, "Male", 20, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_20,maleFuturePopulationSD_20, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_20,maleFuturePopulationDivisionSumSD_20, "Male",20)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [49]:
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_40.loc["Federal"],maleFuturePopulationSD_40.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_40.loc["Federal"],maleFutureAdmissionSD_40.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_40.loc["Federal"],maleFutureReleaseSD_40.loc["Federal"], "Male", 40, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_40,maleFuturePopulationRegionSumSD_40, "Male", 40, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_40,maleFuturePopulationSD_40, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_40,maleFuturePopulationDivisionSumSD_40, "Male",40)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>

The projection of federal prisoner population predicts that the male population will reach a steady state soon following the current decrease that started in mid-2010s. Among the four regions, the prisoner population of all regions but the South has entered a stable period.

In the East North Central division of the Midwest, we can expect a continuous decrease in prisoner population before it starts to stabilize in early 2020s. This decline is mostly due to the decrease of prisoner population in Illinois.

In the Northeast, the current contraction of Mid-Atlantic prisoner population will terminate by 2020, entering a slow growth which is mostly driven by a constant increase of Pennsylvania’s prisoner population.

In the south, both South Atlantic and West South Central can see a slight but continuous growth of prisoner population. In the East South Central division, Kentucky will experience a surge of growth which then transformed into a period of slow but steady development. In the West South Central, Texas continues to lead a sustained increase of prisoner population. In the south Atlantic, the current shrinkage of Florida’s prisoner body will soon enter a period very moderate but elongated expansion.

In the West, both Mountain and Pacific divisions will witness continuous but stagnant increments in their prisoner population. The increase of prisoner population in California will started to dwindle by mid-2020s, while we may expect a volatile upsurge in Washington before it is substituted with a much milder inflation.

In [50]:
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_5.loc["Federal"],femaleFuturePopulationSD_5.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_5.loc["Federal"],femaleFutureAdmissionSD_5.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_5.loc["Federal"],femaleFutureReleaseSD_5.loc["Federal"], "Female", 5, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_5,femaleFuturePopulationRegionSumSD_5, "Female", 5, 55000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_5,femaleFuturePopulationSD_5, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_5,femaleFuturePopulationDivisionSumSD_5, "Female",5)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [51]:
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_10.loc["Federal"],femaleFuturePopulationSD_10.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_10.loc["Federal"],femaleFutureAdmissionSD_10.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_10.loc["Federal"],femaleFutureReleaseSD_10.loc["Federal"], "Female", 10, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_10,femaleFuturePopulationRegionSumSD_10, "Female", 10, 55000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_10,femaleFuturePopulationSD_10, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_10,femaleFuturePopulationDivisionSumSD_10, "Female",10)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [52]:
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_20.loc["Federal"],femaleFuturePopulationSD_20.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_20.loc["Federal"],femaleFutureAdmissionSD_20.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_20.loc["Federal"],femaleFutureReleaseSD_20.loc["Federal"], "Female", 20, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_20,femaleFuturePopulationRegionSumSD_20, "Female", 20, 55000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_20,femaleFuturePopulationSD_20, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_20,femaleFuturePopulationDivisionSumSD_20, "Female",20)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [53]:
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_40.loc["Federal"],femaleFuturePopulationSD_40.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_40.loc["Federal"],femaleFutureAdmissionSD_40.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_40.loc["Federal"],femaleFutureReleaseSD_40.loc["Federal"], "Female", 40, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_40,femaleFuturePopulationRegionSumSD_40, "Female", 40, 55000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_40,femaleFuturePopulationSD_40, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_40,femaleFuturePopulationDivisionSumSD_40, "Female",40)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>

Contradicted to the steady projected growth in the federal female prisoner population, all four regions can expect a sustained decrease in their female prisoner population. Although in most states, the female prisoner population will either maintain the current level, if not very slight increase, or experience a developing shrinkage. Pennsylvania, Alabama, North Carolina, West Virginia, Delaware, Texas, Nevada, New Mexico and Montana can expect to see a significant increase in their female prisoner population.

Projection of prison capacity

The simulated population projections are divided by the capacity of the prison system to reflect a forecast of facility occupancy if no new prison is to be built in the near future. The capacity data used here are the newest available, surveyed in 2016. Again, 250 simulations have been generated, and the mean is plotted in the following figures. The group of projections are then used determine the timing when the occupancy rate will cross a certain threshold through a one-sample ttest with significance level set as 0.05.

In [54]:
def modelAndProjectCapacity_NSamples(populationTable, admissionTable,releaseTable, annualSentence, capacity, initialMu, projectionYear, N):
    
    areas = populationTable.index
    
    futureOccupancies = {}
    
    futureOccupancyMean = []
    futureOccupancySD = []
    
    for area in areas:
        
        if area in capacity.index:
        
            futurePopulation, _,_ = modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N)
#             futurePopulation.sort_index(inplace=True)
            futureOccupancy = futurePopulation / capacity[area]
        
            futureOccupancies[area] = futureOccupancy
            
            occupancyMean = futureOccupancy.mean(axis=1)
            occupancyMean.index.name = "Year"
            occupancyMean.name = area
            
            occupancySD = futureOccupancy.std(axis=1)
            occupancySD.index.name = "Year"
            occupancySD.name = area
            
            futureOccupancyMean.append(occupancyMean)
            futureOccupancySD.append(occupancySD)
            
    futureOccupancyMean = pd.concat(futureOccupancyMean, axis=1).transpose()
    futureOccupancyMean.index.name = "Jurisdiction"
    
    futureOccupancySD = pd.concat(futureOccupancySD, axis=1).transpose()
    futureOccupancySD.index.name = "Jurisdiction"
        
    return futureOccupancies, futureOccupancyMean,futureOccupancySD
In [55]:
futureOccupancies_5, futureOccupancyMean_5,futureOccupancySD_5 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 5, 2050, 250)
futureOccupancies_10, futureOccupancyMean_10,futureOccupancySD_10 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 10, 2050, 250)
futureOccupancies_20, futureOccupancyMean_20,futureOccupancySD_20 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 20, 2050, 250)
futureOccupancies_40, futureOccupancyMean_40,futureOccupancySD_40 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 40, 2050, 250)
In [56]:
def orderCapacityByRegion(table, regions):

    table = futureOccupancyMean_5
    regions = regions.set_index("State")

    table = pd.concat((regions, table), axis=1)
    
    table.index.name = "State"
    table.loc["Federal","Region"] = "Federal"
    table.loc["Federal","Division"] = "Federal"
    table = table.dropna(axis=0)
    table = table.reset_index().set_index(["Region", "Division", "State"]).sort_index()
    
    table.columns.name = "Year"
    table.columns = table.columns.astype(int)
    
    return table
In [57]:
def plotOccupancyProjection(occupancy, occupancy_error, initialMu):
    
    occupancy = orderCapacityByRegion(occupancy, regions)
    occupancy_error = orderCapacityByRegion(occupancy_error, regions)
    
    for region in occupancy.index.get_level_values(0).unique():
        for division in occupancy.loc[region].index.get_level_values(0).unique():
            occupancy_1Division = occupancy.loc[region, division]
            occupancy_error_1Division = occupancy_error.loc[region, division]
        
            plt.figure()
            occupancy_1Division.transpose().plot()
            plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=occupancy_1Division.shape[0])
#             plt.gca().set_prop_cycle(None)
#             for i, row in occupancy_1Division.iterrows():
#                 row_error = occupancy_error_1Division.loc[i]
#                 plt.fill_between(row.index, row - row_error, row + row_error, alpha=0.1)
            if region != "Federal":
                plt.title(region + " -- " + division + " Occupancy Projection, mu[1994]=" + str(initialMu))
            else:
                plt.title("Federal Occupancy Projection, mu[1994]=" + str(initialMu))
            plt.ylabel("Occupancy")
In [58]:
plotOccupancyProjection(futureOccupancyMean_5,futureOccupancySD_5, 5)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [59]:
plotOccupancyProjection(futureOccupancyMean_10,futureOccupancySD_10, 10)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [60]:
plotOccupancyProjection(futureOccupancyMean_20,futureOccupancySD_20, 20)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
In [61]:
plotOccupancyProjection(futureOccupancyMean_20,futureOccupancySD_40, 40)
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>
<Figure size 720x360 with 0 Axes>

The federal prison occupancy will reach its peak level just shy of 150% by early 2020s, before entering a decrease. While most states will suffer a moderate increase of prison occupancy and be forced to function at a relatively over-crowded capacity if no new facility is to be built, the prison occupancy rate of Vermont will rise very drastically to above 500% by 2025. On the other hand, we can expect the occupancy in Illinois, Minnesota and North Dakota to reduce significantly to the level below 100%.